###################
## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
## To be run after Mlogit_NAPPHeteroPlaceboCoordinatesv3.R
###################

rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "/Users/josealbertoguerra/Dropbox/NetworkParish2016/Occupational_Choice/"
setwd(paste(root,"Results/ReStat/Placebo/Coords/",sep=""))
allindexexer <- c("social")#c("all","civil","social")
for(indexexer in allindexexer) {
  indexexer <- "social" #c("social","civil","all") wee need to change manually 
  #codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/ReStat_R&R/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
  codes <- paste(root,"C_ANALYSIS/",sep="")
  source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
  source(paste(codes,"Mlogit_PlaceboCoordFunctions.R",sep=""))
  AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
  
  if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/Placebo/Coords/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/Placebo/Coords/KS/",sep="")}
  sink(paste(rootsave,"NAPPEstimationHetewfe",indexexer,"51-100.txt",sep=""), append=TRUE)
  options(max.print=1000000)
  gc()
  #####
  #Define matrix to keep coefficients from iteration
  ####
  maxiit <- 100
  L <- 6
  GeG <- as.matrix(matrix(rep(0,maxiit*(L)),nrow=maxiit))
  load(file=paste(rootsave,"PMLmtwfinalnew",indexexer,".RData",sep=""))
  GeG[1,] <- summary(ml.m.w)$coeff[paste("sw:",c(0:(L-1)),sep="")]
  #######
  #Load Data
  ii <- 2
  while(ii<101){
    print(ii)
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPnew",indexexer,ii,".RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddataNAPPhomonew",indexexer,ii,".RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPnew",indexexer,ii,".RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/pooleddatalistNAPPhomonew",indexexer,ii,".RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/datalistNAPPnew",indexexer,ii,".RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/coordslistNAPPnew",indexexer,ii,".RData",sep="" )) #coordslist, Lists of coordinates per parish
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/dneighboursNAPPnew",indexexer,ii,".RData",sep="")) #dneighbours, Lists of neigbours identities per parish
    load(file=paste(root,"NAPP/ReStat/Placebo/Coords/wlistNAPPnew",indexexer,ii,".RData",sep="")) #wlist, Sparse Lists of neighbours per parish
    
    #define which variables we include in the estimation and which formula we use
    xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc"
    wvar <- "sw"
    fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
    fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
    fformula <- fformula1
    #namesmlexer <- c(paste("Mlmwnew",indexexer,".RData",sep=""),paste("Mlmtwnew",indexexer,".RData",sep=""))
    #sum(!is.na(fformulas))
    
    #Outer Iteration: Maximum likelihood
    alter <- sort(unique(pooleddata$alt))
    L <- length(alter)
    it0 <- 1 
    maxiter0 <- 50
    zero0 <- 1E-8
    Jeit <- matrix(rep(0,maxiter0*L),nrow=maxiter0) #Keep track of the coefficients
    ndJeit <- L
    
    #To keep original pooleddata to implement Weidner (2012) method
    pooleddata0 <- pooleddata
    datalist0 <- datalist
    pooleddata <- tolong(pooleddatalist,"choice",c("sw.")) # # to convert datalist class "data.table" into "mlogit.data"
    pooleddatalist0 <- pooleddatalist
    coordslist0 <- coordslist
    wlist0 <- wlist
    #wklist0 <- wklist
    
    #defining weight
    weigh <- wlist
    
    #To keep original pooleddata to implement Weidner (2012) method
    pooleddata00 <- pooleddata0
    
    #to define outer iteration of expectations
    ndexpectito <- L
    ndexpectitoall <- matrix(0,nrow=maxiter0) #Keep track of the tolerance level
    expeco <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
    
    while (it0<=maxiter0 & ndexpectito > zero0) {
      print(paste("ML iteration: ",it0, sep=""))
      pooleddata0 <- pooleddata
      datalist0 <- datalist
      pooleddatalist0 <- pooleddatalist
      
      #Estimating
      if (it0==1) {
        load(paste("Mlmtwnew",indexexer,ii,".RData",sep="")) 
        ml.m.w <- ml.mt.w    
        ml.m.w0 <- ml.m.w
      }  else  ml.m.w <- mnlogit(fformula, pooleddata, "alt")
      
      #Define starting conditions, if we want to give them staring values based on theta
      #if (it0==1) ml.m.w$coeff[paste(wvar,":",alter,sep="")] <- runif(L,2,4) 
      
      #Keeping coeff  
      Jeit[it0,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")] 
      print(Jeit[it0,])
      ndexpectitoall[it0,] <- ndexpectito 
      print(ndexpectitoall[it0,])
      
      
      #Inner iteration, fixed point in beliefs
      it <- 1 
      zero <- 1E-8
      #maxiter <- 1 #if iterating a la Aguirregabiria and Mira (2007) or a la Weidner (2012)
      maxiter <- 100 #If iterating with the fixed point a la Lee Lin Liu (2014)
      expectit <- matrix(rep(0,maxiter*L),nrow=maxiter)
      ndexpectit <- L
      
      #subsetting sample to 
      expec <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
      
      #iteration of beliefs
      while (it<=maxiter & ndexpectit > zero) {
        print(paste("Inner Iteration ",it,sep=""))    
        #keepint track of expectations
        expectit[it,] <- apply(expec,2,mean)
        
        alpha <- .1 #Kasahara and Shimotsu (2012)
        #Predict probabilities on the pooled data list
        #tempprob <- ppredict.mnlogit(ml.m.w,pooleddata,probability=TRUE)
        tempprob <- predict(ml.m.w,pooleddata,probability=TRUE)
        for(aa in sort(alter, decreasing=TRUE) ){
          ap <- which(alter==aa)
          if (aa != min(alter)) { #to guarantee KS probabilities are within [0,1] bounds
            if (AM==1) { 
              pooleddatalist[,paste(wvar,".",aa,sep="")] <- ml.m.w$probabilities[,ap] #Aguirregabiria and Mira (2007)
            } else {
              pooleddatalist[,paste(wvar,".",aa,sep="")] <- ((tempprob[,ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Kasahara and Shimotsu (2012)
            }
          } else {
            if (AM==1) { 
              pooleddatalist[,paste(wvar,".",aa,sep="")] <- 1-apply(ml.m.w$probabilities[,paste(alter[alter!=min(alter)],sep="")],1,sum) 
            } else {
              pooleddatalist[,paste(wvar,".",aa,sep="")] <- 1-apply(((tempprob[,paste(alter[alter!=min(alter)],sep="")])^alpha)*((ml.m.w0$probabilities[,paste(alter[alter!=min(alter)],sep="")])^(1-alpha)),1,sum) #Kasahara and Shimotsu (2012)
            }
          }
          #pooleddatalist[,paste(wvar,".",aa,sep="")] <- predict(ml.m.w,pooleddata,probability=TRUE)[,ap]  #Aguirregabiria and Mira (2007)
          #Weidner
          #pooleddatalist[,paste(wvar,".",aa,sep="")] <- predict(ml.m.w,pooleddata00,probability=TRUE)[,ap]  #Weidner
        }
        
        #splitting the previous data into a list 
        datalist  <- split(pooleddatalist, list(pooleddatalist$IDSocial))
        
        #spatially weighting
        provweigh <- vector(length=L,mode="list")
        for(aa in alter){
          ap <- which(alter==aa)
          provweigh[[ap]] <- mapply(weightinga,datalist,weigh)
        }
        
        #updating pooled datalist
        pooleddatalist <- rbindlist(datalist)
        for(aa in alter){
          ap <- which(alter==aa)
          pooleddatalist[[paste(wvar,".",aa,sep="")]] <- unlist(provweigh[[ap]])
        }
        rm(provweigh)
        attr(pooleddatalist,"class") <- "data.frame"
        #updating expectations
        expec1 <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
        
        #Updating pooleddata (long format)
        for(aa in alter){
          pooleddata[pooleddata$alt==aa,][[paste(wvar,sep="")]] <- pooleddatalist[[paste(wvar,".",aa,sep="")]] 
        }
        
        if (it>=2) ndexpectit <- max(as.matrix(abs(expec1-expec))) # the maximum of all matrix entries ndexpectit <- sum(distr(expec,expec1)) #element by element
        print(paste("Max abs diff inner: ",ndexpectit,sep=""))
        expec <- expec1
        it <- it+1
      }
      
      if (it<=maxiter & ndexpectit < zero) {
        print(paste("Fixed point in beliefs convergence reached at iter: ",it-1,sep="")) 
        } else {
        print(paste("Fixed point in beliefs didn't reach convergence with new estimates: ",it,sep="")) #print(paste("Fixed point in beliefs FAILED to converge after it: ",it,sep=""))
        pooleddata <- pooleddata0
        datalist <- datalist0
        pooleddatalist <- pooleddatalist0 
        ml.m.w <- ml.m.w0
        it0 <- maxiter0
      }
      
      #updating expectations
      expec1o <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
      
      if (it0>=2) ndexpectito <- max(as.matrix(abs(expec1o-expeco))) # the maximum of all matrix entries ndJeit <- sum(abs(Jeit[it0,]-Jeit[it0-1,]))
      print(paste("Diff Abs max ",ndexpectito,sep=""))
      expeco <- expec1o
      
      #To keep the previous probabilities to perform Kasahara and Shimotsu
      ml.m.w0 <- ml.m.w
      it0 <- it0+1
    }
    if (it0 <= maxiter0 & it <= maxiter) print(paste("Outer ML Heterogenous iter converged at iter: ",it0-1," using weights: ",wvar,sep="")) else print(paste("ML stopped because belief convergence was not reached, reporting last estimates that converged using weights: ",wvar,sep=""))
    it0 <- min(it0,maxiter0+1)
    Jeit[it0-1,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
    print(Jeit[it0-1,])
    ndexpectitoall[it0-1,] <- ndexpectito 
    print(ndexpectitoall[it0-1,])
    
    #Saving last iteration results
    save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnew",indexexer,".RData",sep=""))
    print(summary(ml.m.w))
    
    GeG[ii,] <- summary(ml.m.w)$coeff[paste("sw:",c(0:(L-1)),sep="")]
   ii <- ii+1 
  }
  dataGeG <- as.data.frame(GeG)
  save(dataGeG, file=paste(rootsave,"dataGeGAll",indexexer,".RData",sep=""))
  
  #Summary statistics
  apply(dataGeG, 2, function(x) quantile(x,probs=c(0.25,.5,.75)))
  
  #% smaller than benchmark estimates
  load(file=paste(root,"Results/ReStat/KS/","PMLmtwfinalnew.RData",sep=""))
  coeff <- summary(ml.m.w)$coeff[paste("sw",":",c(0:5),sep="")]
  for(ii in c(1:6)){
    print(apply(dataGeG, 2, function(x) sum(x<=coeff[ii])/length(x)))
  }
  ##Plot convergence in coefficientes 
  ##coefficients
  #colnames(Jeit)<-c(paste("J.",alter,sep=""))
  #dataJeit<-as.data.frame(Jeit[c(1:(it0-1)),])
  #dataJeit$iter <- c(1:nrow(dataJeit))
  # meltdataJeit<-melt(dataJeit,id="iter")
  # cairo_ps(paste(rootsave,"JePMLmtwfinalnew",indexexer,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  # print(ggplot(meltdataJeit,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(paste(J[y]^{t})))+xlab("t"))
  # dev.off()
  # save(dataJeit,file=paste(rootsave,"JePMLmtwfinalnew",indexexer,".RData",sep=""))
  # 
  # #Plot convergence in Infinity Norm
  # #coefficients
  # colnames(ndexpectitoall)<-"AbsMax"
  # dataInftyNorm<-as.data.frame(ndexpectitoall[c(3:(it0-1))])
  # colnames(dataInftyNorm)<-"AbsMax"
  # dataInftyNorm$iter <- c(1:nrow(dataInftyNorm))
  # meltdataInftyNorm<-melt(dataInftyNorm,id="iter")
  # cairo_ps(paste(rootsave,"InftyNormMLmtwfinalnew",indexexer,".eps",sep=""), width=7, height=7) #to keep transparency from the intervals
  # print(ggplot(meltdataInftyNorm,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(InftyNorm))+xlab("t"))
  # dev.off()
  # save(dataInftyNorm,file=paste(rootsave,"InftyNormMLmtwfinalnew",indexexer,".RData",sep=""))
  
  sink()
}